**to be run after 1_Table_2\6_netincdiff.do
version 13

clear all
set logtype text
set more off
capture log close
log  using  .../AA_7a_un_12Sep2020.smcl, replace

use .../left_cens_spell_10sample_10never_type_1_Sept2020_hmle.dta, clear
** recalculate spells for restricted sample 1996-2011, 							   
drop if year<1996

*** I keep the possibility to select different group if we decide to run it also for group 2
rename lm_status lm_status_1


foreach m of varlist lcensor lcens_id lcens_spell lastsp_unem ban a_spell ///
    a_seq a_end dur last_dur_yr /*last_yr_smpl*/ last_yr  r_c durvar {
drop `m'
}


sort id year
*** split the spells into different components
tsset id year, y
tsspell lm_status_1, end(a_end_1) seq(a_seq_1) spell(a_spell_1)

label var a_spell_1 "definition 1: spell number from the first spell onwards"
label var a_seq_1 "definition 1: duration - going from 1,2,3 etc"
label var a_end_1 "definition 1: dummy for when the spell ends"



*** left censoring indicator for self employment or wage income- only defined for year=1993
*** since the other entries are presumably entry after school.

bys id (year): gen lcensor_1=((lm_status_1==1 | lm_status_1==2) & a_seq_1==1 & a_spell_1==1 & year==1996)
bys id (year): egen lcens_id_1=max(lcensor_1)
bys id a_spell_1: egen lcens_spell_1=max(lcensor_1)

*** drop last spell if unemployed and record the last spell was unemployment
bys id (year): egen numbspells_1=max(a_spell_1)
gen lsp_un_1 = (lm_status_1==0 & a_spell_1==numbspells_1)
bys id (year): egen lastsp_unem_1=max(lsp_un_1)
gen ban_1 = ( lm_status_1==0 & a_spell_1==numbspells_1)
drop lsp_un
*** drop first spell if unemployed
replace ban_1 = 1 if lm_status_1==0 & a_spell_1==1
drop if ban_1==1    /*** changed on 11.06.19 ***/
** check how many individuals have some unemployement spells between wage and/or self_empl spells
gen d=( lm_status_1==0 & (lm_status_1[_n-1]==1 | lm_status_1[_n-1]==2) & id[_n-1]==id)
gen dd=( lm_status_1==0 & (lm_status_1[_n+1]==1 | lm_status_1[_n+1]==2) & id[_n+1]==id)
bys id (year): egen d1=max(d)
bys id (year): egen dd1=max(dd)


*** let's drop them for the moment
replace ban_1 = 1 if dd1==1 & d1==1
*** around  display  115471  / 515745  =.22389165
drop dd d  d1    dd1 

replace ban_1 = 1 if lm_status_1==0


*** drop people that change every or mostly every year status

drop numbspells_1

drop  a_end_1 a_seq_1 a_spell_1   /* new */
tsset id year, y   /* new */
tsspell lm_status_1 if ban_1 == 0, end(a_end_1) seq(a_seq_1) spell(a_spell_1)   /* new */

bys id (year): egen numbspells_1=max(a_spell_1)    /* this is new */


bys id (year): gen numbobs=_N
gen drop1=((numbspells_1==numbobs) & numbobs>=2)   /* new */
gen drop2=((numbspells_1>=numbobs-1) & numbobs>=5)  
gen drop3=((numbspells_1>=numbobs-2) & numbobs>=7)       
gen drop4=((numbspells_1>=numbobs-3) & numbobs>=9)    
gen drop5=((numbspells_1>=numbobs-4) & numbobs>=10)    
gen drop6=((numbspells_1>=numbobs-5) & numbobs>=11)  
gen drop7=((numbspells_1>=numbobs-6) & numbobs>=12)      

replace ban_1 = 1 if drop1 | drop2 | drop3 | drop4 | drop5 | drop6 | drop7
drop drop*

drop numbobs numbspells_1

***gen right_censored

drop  a_end_1 a_seq_1 a_spell_1 
tsset id year, y
tsspell lm_status_1 if ban_1 == 0, end(a_end_1) seq(a_seq_1) spell(a_spell_1)
bys id a_spell_1: egen dur_1=max(a_seq_1)
bys id a_spell_1: gen l_dur_yr_1=year if a_seq_1==dur_1
replace l_dur_yr_1=0 if l_dur_yr_1==.
bys id a_spell_1: egen last_dur_yr_1=max(l_dur_yr_1)
drop l_dur_yr_1
bys  id (a_spell_1 a_seq_1): gen last_yr_1=year if _n==_N
replace last_yr_1 = . if ban_1==1
bys id: egen last_yr_smpl_1=max(last_yr_1)
bys id a_spell_1: gen r_c_1=(last_dur_yr_1==last_yr_smpl_1) 

**** duration variable
gen durvar_1=0 if a_seq_1!=dur_1 
replace durvar_1=1 if a_seq_1==dur_1 & r_c_1==0 & ban_1 == 0
replace durvar_1=0 if a_seq_1==dur_1 & r_c_1==1 & ban_1 == 0

drop if ban_1 == 1
drop ban_1

foreach m of newlist lastsp_unem a_spell a_seq a_end last_dur_yr r_c  {
rename `m'_1  `m'
}

foreach m of newlist lcens_spell lcensor lcens_id dur durvar last_yr /* last_yr_smpl*/ {
rename `m'_1  `m'
}

tab1 year lastsp_unem a_spell a_seq a_end last_dur_yr r_c lcens_spell lcensor lcens_id dur durvar last_yr last_yr_smpl if sedum==1 & lcens_spell==0
tab1 year lastsp_unem a_spell a_seq a_end last_dur_yr r_c lcens_spell lcensor lcens_id dur durvar last_yr last_yr_smpl if sedum==0 & lcens_spell==0
tab1 year lastsp_unem a_spell a_seq a_end last_dur_yr r_c lcens_spell lcensor lcens_id dur durvar last_yr last_yr_smpl if sedum==1 & lcens_spell==1
tab1 year lastsp_unem a_spell a_seq a_end last_dur_yr r_c lcens_spell lcensor lcens_id dur durvar last_yr last_yr_smpl if sedum==0 & lcens_spell==1

save .../left_cens_spell_10sample_10never_type_1_rev1_hmle_1996_Sep2020.dta , replace
log close 
